import scrapy
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


class huabanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains=["douban.com"]
    allow_status = ['500', '400','404','403']
    index_num = 1
    end_num = 41662204
    DOWNLOADER_MIDDLEWARES={
        "Spider.middlewares.RandomUserAgent",
        "Spider.middlewares.ProxyMiddleware"
    }

    def start_requests(self):
        # for i in range(1, self.end_num):
        #     yield scrapy.Request('https://www.douban.com/note/%d/' % i)
        yield scrapy.Request('https://www.douban.com/note/272862028/')

    def parse(self, response):
        self.index_num += 1
        if response.status == 200:
            soup = BeautifulSoup(response.body)
            # scriptList = soup.findAll("script")
            # script_str = ""
            # for item in scriptList:
            #     script_str += item.text
            fp = open(str(self.index_num) + ".txt", "w")
            fp.write(response.body)
            fp.close()
